####################################################
### Make Data Objects for Exchange Data Analysis ###
####################################################

##### Objects we need to make

	# Choice object: households by plans, including uninsured (what about households with multiple plans?)
		# 1 for selected plans
		# 0 for non-selected plans in choice set
		# NA for plans not in choice set
	# Premium object: households by plans
	# Households object: household characteristics
	# Plan object: plan characteristics


##### Load Data

#memory.limit(60000)
setwd("C:/Users/eas24f/OneDrive - Florida State University/Research/Inertia RESTAT/Data")# Office computer directory

data <- get(load("ca_enrollment_data_AUG022019")) # "Cleaned" Covered California enrollment data
households <- get(load("ca_household_data_AUG022019")) # "Cleaned" Covered California household data
#plan_data <- get(load("ca_plan_data_OCT052018")) # Covered California plan data
plan_data <- read.csv("ca_plan_data2.csv") # Covered California plan data
age_rating_factors <- read.csv("age_rating_factors.csv",row.names=1) # CCIIO default rating curve
poverty_guidelines <- read.csv("poverty_guidelines.csv",row.names=1) # Poverty guidelines
rating_areas <- read.csv("ca_rating_areas.csv",row.names = 1) # California county-rating area mapping
zip3_choices <- read.csv("zip3_choices2.csv",row.names = 1) # choice set by 3 digit zip and rating area
product_definitions <- read.csv("product_definitions.csv",row.names = 1) # definitions of column names in zip3_choices
contribution_percentages <- read.csv("contribution_percentages.csv",row.names = 1) # ACA contribution percentages
outside_logit <- get(load("sipp_logit"))

outside_sample <- TRUE
set.seed(2)
output_for_ashley <- FALSE

households <- households[!households$flagged,]
data <- data[!data$flagged,]

gc()

##### Predict who left market
	
	if(outside_sample) {
		
		# Add variables
		
			# Income brackets
			households$FPL_bracket <- "138orless"
			households[households$FPL > 1.38 & households$FPL <= 2.50,"FPL_bracket"] <- "138to250"
			households[households$FPL > 2.50 & households$FPL <= 4,"FPL_bracket"] <- "250to400"
			households[households$FPL > 4,"FPL_bracket"] <- "400ormore"
		
			# Age Brackets
			households$perc_18to34 <- households$perc_18to25 + households$perc_26to34
			households$perc_35to54 <- households$perc_35to44 + households$perc_45to54
			
		# Determine in/out of market
		households$out_of_market <- 0
		out_of_market_predictions <- as.numeric(predict(outside_logit,newdata=households[is.na(households$plan_number_nocsr),],type="response") >=
			runif(nrow(households[is.na(households$plan_number_nocsr),])))
		households[is.na(households$plan_number_nocsr),"out_of_market"] <- out_of_market_predictions
		
		# Save records out of market
		households_out <- households[households$out_of_market == 1,]
		data_out <- data[data$household_year %in% rownames(households_out),]
		
		# Remove records from market - About 26.8% of uninsured records are kept 
		households <- households[households$out_of_market == 0,]
		data <- data[data$household_year %in% rownames(households),]
	}
	
	gc()
	
#### Take Sample (5% of households, not records)
	
	set.seed(6)
	sample_size <- round(.05 * length(unique(households$household_id)))
	keep_household_ids <- sort(sample(unique(households$household_id),size=sample_size,replace=FALSE))
	keep_records <- which(households$household_id %in% keep_household_ids) 
	households <- households[keep_records,]
	data <- data[data$household_year %in% rownames(households),]	
	
	# Add back records when they were out of market
	households_out <- households_out[households_out$household_id %in% keep_household_ids,]
	data_out <- data_out[data_out$household_year %in% rownames(households_out),]
	households <- rbind(households,households_out)
	data <- rbind(data,data_out)
	
	rm(households_out)
	rm(data_out)
	gc()
	
#### Adjust previous choice variable

	if(!output_for_ashley) {
		# Households in the market now have a previous choice (unless they are currently uninsured)
		households[households$out_of_market == 0 & is.na(households$previous_plan_number) & 
			!is.na(households$plan_number_nocsr) & households$year > 2014,"previous_plan_number"] <- 
		households[households$out_of_market == 0 & is.na(households$previous_plan_number) & 
			!is.na(households$plan_number_nocsr) & households$year > 2014,"plan_number_nocsr"]

		# Households out of the market now have a previous choice (unless they were most recently uninsured)

		fill_indices <- which(households$out_of_market == 1 & is.na(households$previous_plan_number) & households$year > 2014)
		for(i in fill_indices) {
			household_records <- which(households$out_of_market == 0 & households$household_id == households[i,"household_id"])
			most_proximate_year <- household_records[which.min(abs(households[household_records,"year"] - households[i,"year"]))]
			households[i,"previous_plan_number"] <- households[most_proximate_year,"plan_number_nocsr"]
			
			#if(abs(households_all[most_proximate_year,"year"] - households_out[i,"year"]) > 1) {
			#		households[i,"previous_plan_number"] <- -1
			#} else {
			#	households[i,"previous_plan_number"] <- households[most_proximate_year,"plan_number_nocsr"]
			#}
		}
	}
	
	# Drop out-of-market records more than 1 year removed (e.g., a 2017 record if they were last enrolled in 2015)
	#households_out <- households_out[households_out$previous_plan_number > 0 | is.na(households_out$previous_plan_number),]
	
##### Choice Matrix (households by plans)
	# 1 if household selected plan, 0 if household did not select plan, NA if plan not in choice set
	# Complication: households that selected multiple plans
	
	# NOTE: I am reordering plan data because I defined a plan_id number earlier based on a previous ordering
	plan_data$Plan_ID_Order <- as.character(plan_data$Plan_ID_Order)
	rownames(plan_data) <- paste(plan_data$Plan_Name,plan_data$ENROLLMENT_YEAR,
		plan_data$region,sep="")
	plan_data <- plan_data[plan_data$Plan_ID_Order,]
	
	plan_names <- sort(unique(plan_data$Plan_Name2))
	plan_numbers <- 1:length(plan_names)
	names(plan_numbers) <- plan_names
	#data$plan_number <- NA
	#data[!is.na(data$plan_id),"plan_number"] <- plan_numbers[data[!is.na(data$plan_id),"plan_name"]]
	data[is.na(data$plan_id),"plan_number"] <- max(data$plan_number,na.rm=TRUE) + 1
	plan_data$plan_number <- plan_numbers[plan_data$Plan_Name2]
	
	# Assign an integer to each plan (eliminating small plans
	plan_names_small <- sort(unique(plan_data$Plan_Name_Small))
	plan_numbers_small <- 1:length(plan_names_small)
	names(plan_numbers_small) <- plan_names_small
	#data$plan_number_small <- NA
	#data[!is.na(data$plan_id),"plan_number_small"] <- plan_numbers_small[data[!is.na(data$plan_id),"plan_name"]]
	#data[is.na(data$plan_id),"plan_number_small"] <- max(data$plan_number_small,na.rm=TRUE) + 1
	plan_data$plan_number_small <- plan_numbers_small[plan_data$Plan_Name_Small]
	
	# In this version, we are going to eliminate CSR, HSA, and Catastrophic plans
	plan_names_nohsacat <- sort(unique(plan_data$Plan_Name_Small_NOHSACAT))
	plan_numbers_nohsacat <- 1:length(plan_names_nohsacat)
	names(plan_numbers_nohsacat) <- plan_names_nohsacat
	plan_data$plan_number_nohsacat <- plan_numbers_nohsacat[plan_data$Plan_Name_Small_NOHSACAT]
	
	# This will link the full plan number to the nohsacat plan number
	reference_numbers <- plan_numbers
	for(j in 1:length(reference_numbers)){
		reference_plan_name <- unique(plan_data[which(plan_data$Plan_Name == names(plan_numbers)[j]),"Plan_Name_Small_NOHSACAT"]) 
		reference_numbers[j] <- plan_numbers_nohsacat[reference_plan_name]
	}
	
	plan_data$Issuer_Name <- as.character(plan_data$Issuer_Name)
	plan_data[plan_data$Issuer_Name == "Anthem Blue Cross","Issuer_Name"] <- "Anthem"
	plan_data[plan_data$Issuer_Name == "Blue Shield","Issuer_Name"] <- "Blue_Shield"
	plan_data[plan_data$Issuer_Name == "Chinese Community","Issuer_Name"] <- "Chinese_Community"
	plan_data[plan_data$Issuer_Name == "Contra Costa Health Plan","Issuer_Name"] <- "Contra_Costa"
	plan_data[plan_data$Issuer_Name == "Health Net","Issuer_Name"] <- "Health_Net"
	plan_data[plan_data$Issuer_Name == "LA Care","Issuer_Name"] <- "LA_Care"
	plan_data[plan_data$Issuer_Name == "UnitedHealthcare","Issuer_Name"] <- "United"
	plan_data[plan_data$Issuer_Name == "Western Health","Issuer_Name"] <- "Western"
	plan_data[plan_data$Issuer_Name == "Sharp","Issuer_Name"] <- "SHARP"
	
	zipchoices <- as.matrix(zip3_choices[,4:ncol(zip3_choices)])
	product_definitions$insurer <- as.character(product_definitions$insurer)
	product_definitions$plan_network_type <- as.character(product_definitions$plan_network_type)
	single_product_insurers <- c("Chinese_Community","Contra_Costa","Kaiser","LA_Care","Molina",
				"Oscar","United","Valley","Western")
	large_insurers <- c("Anthem","Blue_Shield","Kaiser","Health_Net","Molina")
	#households$zip_region_year <- paste(households$zip_3,households$rating_area,households$year,sep="_")			
				
	# Initialize choice and new plans matrix
	choices <- matrix(NA,nrow=dim(households)[1],ncol=length(plan_names_nohsacat),dimnames=list(rownames(households),plan_names_nohsacat))	
	new_choices <- matrix(0,nrow=dim(households)[1],ncol=length(plan_names_nohsacat),dimnames=list(rownames(households),plan_names_nohsacat))	

	# Households
	households$change_in_products <- 0
	households$change_in_insurers <- 0
	households$change_in_large_insurers <- 0
	households$market_turnover <- 0
	
	# Choices for the exchange population for which we have 3-digit zip (plan has to be offered in year and zip)
	for(i in rownames(zip3_choices)) {
		
			t <- zip3_choices[i,"Year"]
		
			# Get the broad product categories available (insurer/network type/etc. - no metal)
			region_year_plans <- which(plan_data$ENROLLMENT_YEAR == t &
				plan_data$region == zip3_choices[i,"Region"])
			available_products <- zipchoices[i,]
			available_products <- names(available_products)[!is.na(available_products)]
			available_insurers <- unique(product_definitions[available_products,"insurer"])
			large_available_insurers <- intersect(large_insurers,available_insurers)
			available_plans <- c()
			
			# Last year's plans
			if(t > 2014) {
				lastyr_zip <- paste(zip3_choices[i,"zip3"],zip3_choices[i,"Region"],t-1,sep="_")
				lastyr_region_year_plans <- which(plan_data$ENROLLMENT_YEAR == t-1 &
					plan_data$region == zip3_choices[lastyr_zip,"Region"])
				lastyr_available_products <- zipchoices[lastyr_zip,]
				lastyr_available_products <- names(lastyr_available_products)[!is.na(lastyr_available_products)]
				lastyr_available_insurers <- unique(product_definitions[lastyr_available_products,"insurer"])
				lastyr_large_available_insurers <- intersect(large_insurers,lastyr_available_insurers)
				lastyr_available_plans <- c()
			}
			
			# Now get the specific plans available
						
			if ("Anthem" %in% available_insurers) {
				if("Anthem_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "HMO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_EPO_MSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
							plan_data$MSP == 1)))
				}
				if("Anthem_PPO_MSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
							plan_data$MSP == 1)))
				}
			} 
			if ("Blue_Shield" %in% available_insurers) {
				# could be HMO, EPO, PPO
				if("Blue_Shield_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "HMO" )))
				}
				if("Blue_Shield_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "EPO" )))
				}
				if("Blue_Shield_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "PPO" )))
				}
			} 
			if ("Health_Net" %in% available_insurers) {
				# could be HMO, HSP, EPO, PPO
				if("Health_Net_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HMO")))
				}
				if("Health_Net_HSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HSP")))
				}
				if("Health_Net_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "EPO")))
				}
				if("Health_Net_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "PPO")))
				}
			}
			if ("SHARP" %in% available_insurers) {
				# could be network 1 or 2
				if("Sharp1" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 1 & 
							!is.na(plan_data$Network_num))))
				}
				if("Sharp2" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 2 & 
							!is.na(plan_data$Network_num))))
				}
			}
			if (any(single_product_insurers %in% available_insurers)) {
				available_plans <- c(available_plans,
					intersect(region_year_plans,
						which(plan_data$Issuer_Name %in% intersect(available_insurers,single_product_insurers))))
			}
			
			households_to_update <- which(households$zip_region_year == i & !is.na(households$zip_region_year))
			available_plan_names <- unique(as.character(plan_data[available_plans,"Plan_Name_Small_NOHSACAT"]))
			choices[households_to_update,available_plan_names] <- 0
			
				if(t > 2014) {
					if(length(available_products) > length(lastyr_available_products)) {
						households[households_to_update,"change_in_products"] <- 1 # Net entry
					} else if (length(available_products) < length(lastyr_available_products)) {
						households[households_to_update,"change_in_products"] <- -1 # Net exit
					}
					
					if(length(available_insurers) > length(lastyr_available_insurers)) {
						households[households_to_update,"change_in_insurers"] <- 1 # Net entry
					} else if (length(available_insurers) < length(lastyr_available_insurers)) {
						households[households_to_update,"change_in_insurers"] <- -1 # Net exit
					}
					
					if(length(large_available_insurers) > length(lastyr_large_available_insurers)) {
						households[households_to_update,"change_in_large_insurers"] <- 1 # Net entry
					} else if (length(large_available_insurers) < length(lastyr_large_available_insurers)) {
						households[households_to_update,"change_in_large_insurers"] <- -1 # Net exit
					}
					
					lastyr_large_available_insurers
					
					number_in_market <- sum(households[!is.na(households$plan_number_nocsr) & households$zip_region_year == i &
							!is.na(households$zip_region_year),"weight"])
					
					if(number_in_market > 0) {
						households[households_to_update,"market_turnover"] <- 
							sum(households[is.na(households$previous_plan_number) & 
								!is.na(households$plan_number_nocsr) & households$zip_region_year == i &
								!is.na(households$zip_region_year),"weight"])/number_in_market
					}	
					
				}
			
			if(t > 2014) {
				if ("Anthem" %in% lastyr_available_insurers) {
					if("Anthem_HMO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "HMO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_EPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_PPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
								plan_data$MSP == 0)))
					}
					if("Anthem_EPO_MSP" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
								plan_data$MSP == 1)))
					}
					if("Anthem_PPO_MSP" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
								plan_data$MSP == 1)))
					}
				} 
				if ("Blue_Shield" %in% lastyr_available_insurers) {
					# could be HMO, EPO, PPO
					if("Blue_Shield_HMO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "HMO" )))
					}
					if("Blue_Shield_EPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "EPO" )))
					}
					if("Blue_Shield_PPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "PPO" )))
					}
				} 
				if ("Health_Net" %in% lastyr_available_insurers) {
					# could be HMO, HSP, EPO, PPO
					if("Health_Net_HMO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HMO")))
					}
					if("Health_Net_HSP" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HSP")))
					}
					if("Health_Net_EPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "EPO")))
					}
					if("Health_Net_PPO" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "PPO")))
					}
				}
				if ("SHARP" %in% lastyr_available_insurers) {
					# could be network 1 or 2
					if("Sharp1" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 1 & 
								!is.na(plan_data$Network_num))))
					}
					if("Sharp2" %in% lastyr_available_products) {
						lastyr_available_plans <- c(lastyr_available_plans,
							intersect(lastyr_region_year_plans,
								which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 2 & 
								!is.na(plan_data$Network_num))))
					}
				}
				if (any(single_product_insurers %in% lastyr_available_insurers)) {
					lastyr_available_plans <- c(lastyr_available_plans,
						intersect(lastyr_region_year_plans,
							which(plan_data$Issuer_Name %in% intersect(lastyr_available_insurers,single_product_insurers))))
				}
				
				lastyr_available_plan_names <- unique(as.character(plan_data[lastyr_available_plans,"Plan_Name_Small_NOHSACAT"]))
				new_plans <- setdiff(available_plan_names,lastyr_available_plan_names)
				new_choices[households_to_update,new_plans] <- 1 

			}
	}
	
	# Restrictions on who can pick ACS plans
		# Must be subsidy eligible and have income below 250%
		# "Unenhanced silver" is not in choice set of ACS-eligibles
		# NOTE: this is not quite accurate if households have both subsidized and unsubsidized members
	
	acs_eligibles73 <- which(households$FPL > 2 & households$FPL <= 2.5 & households$subsidized_members > 0)
	acs_eligibles87 <- which(households$FPL > 1.5 & households$FPL <= 2 & households$subsidized_members > 0)
	acs_eligibles94 <- which(households$FPL <= 1.5 & households$subsidized_members > 0)
	
	# Household AV matrix 
		# Just the plan AV for non-silver plans
		# CSR plan AV for silver plans
	
	AV_matrix <- matrix(0,nrow=dim(households)[1],ncol=length(plan_names_nohsacat),dimnames=list(rownames(households),plan_names_nohsacat))	
	
	AV_matrix[,unique(plan_data[plan_data$metal_level %in% c("Bronze"),"Plan_Name_Small_NOHSACAT"])] <- 0.6
	AV_matrix[,unique(plan_data[plan_data$metal_level %in% c("Gold"),"Plan_Name_Small_NOHSACAT"])] <- 0.8
	AV_matrix[,unique(plan_data[plan_data$metal_level %in% c("Platinum"),"Plan_Name_Small_NOHSACAT"])] <- 0.9
	
	AV_matrix[,unique(plan_data[plan_data$metal_level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.7
	AV_matrix[acs_eligibles73,unique(plan_data[plan_data$metal_level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.73
	AV_matrix[acs_eligibles87,unique(plan_data[plan_data$metal_level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.87
	AV_matrix[acs_eligibles94,unique(plan_data[plan_data$metal_level %in% c("Silver"),"Plan_Name_Small_NOHSACAT"])] <- 0.94
		
	gc()
	                
		
	# Input choice
		# Complication with households choosing multiple plans
		# Use choice of household head
	
	plan_data <- rbind(plan_data,rep(NA,dim(plan_data)[2]))
	plan_data$Plan_Name_Small_NOHSACAT <- as.character(plan_data$Plan_Name_Small_NOHSACAT)
	plan_data[nrow(plan_data),"Plan_Name_Small_NOHSACAT"] <- "Uninsured"
	#plan_data[nrow(plan_data),"plan_unique_id"] <- "Uninsured"
	plan_data[nrow(plan_data),"plan_number_nohsacat"] <- ncol(choices) + 1
	plan_data[nrow(plan_data),"plan_number"] <- max(plan_data$plan_number,na.rm=T) + 1
	#rownames(plan_data)[nrow(plan_data)] <- "Uninsured"
	
	assign_head_plan <- function(x) {
		return(as.integer(x[1]))
	}
	
	assign_oldest <- function(x) {
		return(as.character(x[1]))
	}
	
	households$plan_number <- as.numeric(by(data[,"plan_number"],data[,"household_year"],assign_head_plan)[rownames(households)])
	households$plan_name <- by(data[,"plan_name"],data[,"household_year"],assign_oldest)[rownames(households)]
	households$plan_id <- as.numeric(by(data[,"plan_id"],data[,"household_year"],assign_head_plan)[rownames(households)])
	#households[households$plan_number == max(plan_data$plan_number),"plan_name"] <- "Uninsured"
	
	households$plan_number_nohsacat <- reference_numbers[households$plan_number]
	households[is.na(households$plan_id),"plan_number_nohsacat"] <- ncol(choices) + 1	
		
	choices <- cbind(choices,rep(0,nrow(choices)))
	colnames(choices) <- c(colnames(choices)[1:(ncol(choices)-1)],"Uninsured")
	choices[cbind(seq(nrow(choices)),households$plan_number_nohsacat)] <- 1
	gc()
	
	new_choices <- cbind(new_choices,rep(0,nrow(new_choices)))
	colnames(new_choices) <- c(colnames(new_choices)[1:(ncol(new_choices)-1)],"Uninsured")
	gc()
	
	AV_matrix <- cbind(AV_matrix,rep(0,nrow(AV_matrix)))
	colnames(AV_matrix) <- c(colnames(AV_matrix)[1:(ncol(AV_matrix)-1)],"Uninsured")
	gc()
	
	# Number of choices in household choice set (deduct 1 because of the uninsured option)
	households$num_choices <- rowSums(!is.na(choices)) - 1
	
	#write.csv(choices[,!colnames(choices) == "Uninsured"], file = "ca_choice_matrix_SEP012016.csv",row.names=FALSE)
	#rm(choices_julia)
	gc()
	
##### Previous plan choices

	# Previous plan choice is already in household object
	# You need to be careful with CSR plans (shift between CSR plans shouldn't count as change
	
	households$previous_plan_number_nohsacat <- reference_numbers[households$previous_plan_number]

	
	previous_choices <- matrix(0,nrow(choices),ncol(choices),dimnames=list(rownames(choices),colnames(choices)))
	
	for(j in unique(households$previous_plan_number_nohsacat)) {
	
		# Get households that chose j in previous year
		households_on_j <- which(households$previous_plan_number_nohsacat == j)
	
		# Get all plans that would be considered equivalent to j
		#j_plan_name <- colnames(choices)[j]
		#equivalent_plans <- unique(plan_data[plan_data$Plan_Name_Small_NOCSR == j_plan_name,"plan_number_small"])
	
		# Input previous choice (all equivalent plans get a 1, will eliminate extras in make.julia.V2.r)
		#previous_choices[households_on_j,equivalent_plans] <- 1
		previous_choices[households_on_j,j] <- 1
	}
	
	#previous_choices <- cbind(previous_choices,NA)
	#colnames(previous_choices)[ncol(previous_choices)] <- "Uninsured"
	
##### Premium Object

#choices <- get(load("ca_choice_matrix_FEB082016"))

	premiums <- choices
	
	catastrophic_plans <- which(plan_data$metal_level == "Minimum Coverage")
	
	# Choices for the exchange population for which we have 3-digit zip (plan has to be offered in year and zip)
	for(i in rownames(zip3_choices)) {
		
			region_year_plans <- which(plan_data$ENROLLMENT_YEAR == zip3_choices[i,"Year"] &
				plan_data$region == zip3_choices[i,"Region"])
		
			# Get the broad product categories available (insurer/network type/etc. - no metal)
			available_products <- zipchoices[i,]
			available_products <- names(available_products)[!is.na(available_products)]
			available_insurers <- unique(product_definitions[available_products,"insurer"])
			
			# Now get the specific plans available
			
			available_plans <- c()
			if ("Anthem" %in% available_insurers) {
				if("Anthem_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "HMO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
							plan_data$MSP == 0)))
				}
				if("Anthem_EPO_MSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "EPO" &
							plan_data$MSP == 1)))
				}
				if("Anthem_PPO_MSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Anthem" & plan_data$PLAN_NETWORK_TYPE == "PPO" &
							plan_data$MSP == 1)))
				}
			} 
			if ("Blue_Shield" %in% available_insurers) {
				# could be HMO, EPO, PPO
				if("Blue_Shield_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "HMO" )))
				}
				if("Blue_Shield_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "EPO" )))
				}
				if("Blue_Shield_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Blue_Shield" & plan_data$PLAN_NETWORK_TYPE == "PPO" )))
				}
			} 
			if ("Health_Net" %in% available_insurers) {
				# could be HMO, HSP, EPO, PPO
				if("Health_Net_HMO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HMO")))
				}
				if("Health_Net_HSP" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "HSP")))
				}
				if("Health_Net_EPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "EPO")))
				}
				if("Health_Net_PPO" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "Health_Net" & plan_data$PLAN_NETWORK_TYPE == "PPO")))
				}
			}
			if ("SHARP" %in% available_insurers) {
				# could be network 1 or 2
				if("Sharp1" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 1 & 
							!is.na(plan_data$Network_num))))
				}
				if("Sharp2" %in% available_products) {
					available_plans <- c(available_plans,
						intersect(region_year_plans,
							which(plan_data$Issuer_Name == "SHARP" & plan_data$Network_num == 2 & 
							!is.na(plan_data$Network_num))))
				}
			}
			if (any(single_product_insurers %in% available_insurers)) {
				available_plans <- c(available_plans,
					intersect(region_year_plans,
						which(plan_data$Issuer_Name %in% intersect(available_insurers,single_product_insurers))))
			}
			
			available_plans <- unique(available_plans)
			available_plan_names <- as.character(plan_data[available_plans,"Plan_Name_Small_NOHSACAT"])
			
			# These are the premiums before merging the small plans and HSA/catastrophic/bronze
			premium_plans <- plan_data[available_plans,"Premium"]/1.278
			names(premium_plans) <- available_plan_names
			
			
			# Here we need to do something about the small plans
					# Solution 1: take the minimum for small plans (what I do here)
					# Solution 2: average
					# NOTE: this ends up catching 2 Health Net plans (an HSP and PPO get merged) - no big deal
				
			# For bronze/hsa/cat - take minimum of the bronze plans	
				
				duplicate_plans <- unique(available_plan_names[duplicated(available_plan_names)])					
				for(s in duplicate_plans) {
					plan_indices <- which(available_plan_names == s)
					plans_to_merge <- available_plans[plan_indices]
					plans_to_merge <- setdiff(plans_to_merge,catastrophic_plans)
					premium_plans[plan_indices] <- min(plan_data[plans_to_merge,"Premium"]/1.278)
				}
			
			unique_available_plan_names <- which(!duplicated(available_plan_names))
			available_plan_names <- available_plan_names[unique_available_plan_names]
			premium_plans <- premium_plans[unique_available_plan_names]	
			
			households_to_update <- which(households$zip_region_year == i & !is.na(households$zip_region_year))
			
			premiums[households_to_update,available_plan_names] <- households[households_to_update,"rating_factor"] %*% t(premium_plans)
	}

	gc()
	
	# Insert Mandate Penalty as the Price of Being Uninsured
	# I am converting this penalty to a monthly basis 
	premiums[,"Uninsured"] <- households$penalty/12
	gc()
	
	# Apply Subsidy to all plans except catastrophic
	#catastrophic_plans <- unique(plan_data[plan_data$metal_level == "Minimum Coverage","Plan_Name_Small"])
	#noncatastrophic_plans <- which(!colnames(premiums) %in% catastrophic_plans)
	subsidies <- households$subsidy
	subsidies[is.na(subsidies)] <- 0
	
	unsubsidized_premiums <- premiums
	gc()
	premiums <- pmax(premiums - subsidies,0)
	gc()
	
	# Set premiums of plans not in choice set to NA
	premiums <- premiums * pmax(pmin(choices,1),1)	
	gc()
	subsidies <- unsubsidized_premiums - premiums
	gc()
	rm(unsubsidized_premiums)
	gc()
	subsidies[,ncol(subsidies)] <- 0
	gc()

	if(output_for_ashley) {
		save(premiums,file="ca_premium_matrix_AUG032019_ashley")		
		save(choices,file="ca_choice_matrix_AUG032019_ashley")	
		save(new_choices,file="ca_new_choice_matrix_AUG032019_ashley")	
		save(previous_choices,file="ca_previous_choice_matrix_AUG032019_ashley")	
		save(subsidies,file="ca_subsidy_matrix_AUG032019_ashley")
		save(AV_matrix,file="ca_AV_matrix_AUG032019_ashley")	
	} else {
		save(premiums,file="ca_premium_matrix_AUG032019_churn")		
		save(choices,file="ca_choice_matrix_AUG032019_churn")	
		save(new_choices,file="ca_new_choice_matrix_AUG032019_churn")	
		save(previous_choices,file="ca_previous_choice_matrix_AUG032019_churn")	
		save(subsidies,file="ca_subsidy_matrix_AUG032019_churn")
		save(AV_matrix,file="ca_AV_matrix_AUG032019_churn")	
	}	

	
rm(subsidies)
rm(premiums)
rm(previous_choices)
rm(new_choices)
gc()
	
##### Households Object

	compute.number.unique <- function(x) {
		return(length(unique(x)))
	}	

	#households[households$plan_name == "Uninsured","plan_name"] <- NA	
	households[is.na(households$previous_plan_offered),"previous_plan_offered"] <- 0	
	households$enrolled_beg_year <- as.numeric(is.na(households$previous_plan_offered) & (households$SEP == 0 & !is.na(households$SEP)))
	
	# Service Channel
		
		data$channel <- as.character(data$service_channel)
		data[data$channel %in% c("CIA","PBE"),"channel"] <- "Insurance Agent"
		data[data$channel %in% c("SCR","CEW","CEC"),"channel"] <- "Other Assistance"
		data[is.na(data$plan_id),"channel"] <- NA
	
		assign_service_channel <- function(x) {
			unique_channel <- as.character(unique(x))
			if(length(unique_channel) == 1) {
				return(unique_channel)
			} else if ("Insurance Agent" %in%  x) {
				return("Insurance Agent")
			} else if ("Other Assistance" %in%  x) {
				return("Other Assistance")
			} else {
				return("Unassisted")
			}
		}
	
		service_channels <- by(data[!is.na(data$channel),"channel"],data[!is.na(data$channel),"household_year"],assign_service_channel)
		households$channel <- NA
		households[names(service_channels),"channel"] <- service_channels 
	
	# Language spoken
	
		data$language <- as.character(data$language_spoken)
		data[data$language == "English","language"] <- "English"
		data[data$language == "Spanish","language"] <- "Spanish"
		data[!data$language %in% c("(nonres","English","Spanish"),"language"] <- "Other Language"
		data[data$language == "(nonres","language"] <- NA
	
		assign_language <- function(x) {
			unique_channel <- as.character(unique(x))
			if(length(unique_channel) == 1) {
				return(unique_channel)
			} else if ("English" %in%  x) {
				return("English")
			} else if ("Spanish" %in%  x) {
				return("Spanish")
			} else {
				return("Other Language")
			}
		}
	
		languages <- by(data[!is.na(data$language),"language"],data[!is.na(data$language),"household_year"],assign_language)
		households$language <- NA
		households[names(languages),"language"] <- languages 
	
	# Mean age
	mean_ages <- by(data$AGE,data$household_year,mean)
	households[names(mean_ages),"mean_age"] <- mean_ages
	
	# SEP - make sure no NA values (could be NA if uninsured)
	households[is.na(households$plan_id),"SEP"] <- 0
	
	# SLC_contribution - make sure no NA values
	households[is.na(households$SLC_contribution),"SLC_contribution"] <- 
		households[is.na(households$SLC_contribution),"premiumSLC"] 
	
	fields <- sort(c("plan_name","plan_id","household_id","plan_number","FPL","household_size","enrollees",
		"perc_0to17","perc_18to25","perc_26to34","perc_35to44","perc_45to54","perc_55to64","perc_65plus","perc_asian","perc_black","perc_hispanic","perc_other","perc_white", 
		"perc_male","zip3","zip_region_year","rating_area","year","rating_factor","subsidy","subsidized_members","premiumSLC","SLC_contribution",
		"penalty","weight","exempt.belowfiling","exempt.unaffordable","out_of_market",
		"dep_midyear","plan_number_nocsr","previous_plan_number","previous_plan_offered","SEP","enrolled_beg_year",
		"channel","language","mean_age","num_choices"))
		
	# Save Data
	households <- households[,fields]
	gc()
	
	if(output_for_ashley) {
		save(households,file="ca_household_data_for_ashley")	
		write.csv(households,file="ca_household_data_for_ashley.csv",row.names=FALSE)	
	} else {
		save(households,file="ca_household_characteristics_AUG032019_churn")	
		write.csv(households,file="ca_household_characteristics_AUG032019_churn.csv",row.names=FALSE)	
	}	
	